Fit temperature models and predict growing season temperature

Author

Max Lindmark, Jan Ohlberger, Anna Gårdmark

Published

March 13, 2025

Load libraries

library(here)
here() starts at /Users/maxlindmark/Dropbox/Max work/R/perch-growth
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidylog)

Attaching package: 'tidylog'

The following objects are masked from 'package:dplyr':

    add_count, add_tally, anti_join, count, distinct, distinct_all,
    distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
    full_join, group_by, group_by_all, group_by_at, group_by_if,
    inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
    relocate, rename, rename_all, rename_at, rename_if, rename_with,
    right_join, sample_frac, sample_n, select, select_all, select_at,
    select_if, semi_join, slice, slice_head, slice_max, slice_min,
    slice_sample, slice_tail, summarise, summarise_all, summarise_at,
    summarise_if, summarize, summarize_all, summarize_at, summarize_if,
    tally, top_frac, top_n, transmute, transmute_all, transmute_at,
    transmute_if, ungroup

The following objects are masked from 'package:tidyr':

    drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
    spread, uncount

The following object is masked from 'package:stats':

    filter
library(RColorBrewer)
library(viridis)
Loading required package: viridisLite
library(sdmTMB)
library(sdmTMBextra)

Attaching package: 'sdmTMBextra'

The following objects are masked from 'package:sdmTMB':

    add_barrier_mesh, dharma_residuals, extract_mcmc
library(patchwork)
Warning: package 'patchwork' was built under R version 4.3.3
library(RCurl)

Attaching package: 'RCurl'

The following object is masked from 'package:tidyr':

    complete
library(tidylog)

# devtools::install_github("seananderson/ggsidekick") # not on CRAN
library(ggsidekick)
theme_set(theme_sleek())

# Set path:
home <- here::here()

Load cache

# qwraps2::lazyload_cache_dir(path = paste0(home, "/R/analyze-data/01-fit-temp-models-predict_cache/html"))

Read data

d <- read_csv(paste0(home, "/data/clean/temp_data_for_fitting.csv"))
Rows: 98616 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (4): area, source, db, id
dbl  (8): year, temp, yday, month, day, VkDag, stn_nr, section_nr
date (1): date

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d <- d |>
  mutate(
    area = as.factor(area),
    source_f = as.factor(source),
    year_f = as.factor(year)
  ) |>
  filter(!area %in% c("VN", "TH")) # VN: no logger data, TH: to short time series
mutate: converted 'area' from character to factor (0 new NA)
        new variable 'source_f' (factor) with 3 unique values and 0% NA
        new variable 'year_f' (factor) with 83 unique values and 0% NA
filter: removed 5,545 rows (6%), 93,071 rows remaining
# Keep track of the years for which we have cohorts for matching with cohort data
gdat <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/perch-growth/master/data/clean/dat.csv")
Rows: 338460 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (6): age_ring, area, gear, ID, sex, keep
dbl (6): length_mm, age_bc, age_catch, catch_year, cohort, final_length

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gdat$area_cohort_age <- as.factor(paste(gdat$area, gdat$cohort, gdat$age_bc))

order <- read_csv(paste0(home, "/output/ranked_temps.csv"))
Rows: 10 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): area
dbl (1): mean_temp

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
order <- order |> 
  mutate(area2 = ifelse(area %in% c("SI_HA", "BT"), paste0(area, "*"), area))
mutate: new variable 'area2' (character) with 10 unique values and 0% NA
gdat <- gdat |>
  group_by(area_cohort_age) |>
  filter(n() > 10) |>
  filter(age_catch > 3) |>
  group_by(area) |>
  summarise(
    min = min(cohort),
    max = max(cohort)
  ) |>
  arrange(min)
group_by: one grouping variable (area_cohort_age)
filter (grouped): removed 5,190 rows (2%), 333,270 rows remaining
filter (grouped): removed 107,058 rows (32%), 226,212 rows remaining
group_by: one grouping variable (area)
summarise: now 12 rows and 3 columns, ungrouped
d <- left_join(d, gdat, by = "area") |>
  mutate(
    area = as.factor(area),
    growth_dat = ifelse(year >= min & year <= max, "Y", "N")
  )
left_join: added 2 columns (min, max)
           > rows only in x        0
           > rows only in y  (     2)
           > matched rows     93,071
           >                 ========
           > rows total       93,071
mutate: converted 'area' from character to factor (0 new NA)
        new variable 'growth_dat' (character) with 2 unique values and 0% NA
# Drop data in SI_HA and BT before onset of warming
d <- d |>
  mutate(
    discard = "N",
    discard = ifelse(area == "BT" & year <= 1980, "Y", discard),
    discard = ifelse(area == "SI_HA" & year <= 1972, "Y", discard)
  ) |>
  filter(discard == "N")
mutate: new variable 'discard' (character) with 2 unique values and 0% NA
filter: removed 1,146 rows (1%), 91,925 rows remaining
# Drop heated areas actually for the full plot
df <- d |> filter(!area %in% c("BT", "SI_HA"))
filter: removed 24,149 rows (26%), 67,776 rows remaining
nareas <- length(unique(order$area)) + 2 # to skip the brightest colors that are hard to read
colors <- colorRampPalette(brewer.pal(name = "RdYlBu", n = 10))(nareas)[-c(6, 7)]

Plot data

df |>
  filter(growth_dat == "Y") |>
  distinct(area, year, source) |>
  ggplot(aes(year, area, color = source)) +
  geom_point(position = position_dodge(width = 0.4), shape = 15) +
  labs(y = "Site", x = "Year", color = "Source") +
  scale_color_brewer(palette = "Dark2") +
  theme(legend.position = "bottom")
filter: removed 24,060 rows (35%), 43,716 rows remaining
distinct: removed 43,092 rows (99%), 624 rows remaining

ggsave(paste0(home, "/figures/supp/temp_sources.pdf"), width = 15, height = 11, units = "cm")

Area-specific models

spec_preds <- list()
res_list <- list()

for (i in unique(d$area)) {
  dd <- d |> filter(area == i)

  if (unique(dd$area) %in% c("BS", "BT", "FB", "FM", "MU", "SI_EK")) { # RA, JM, HO, SI_HA remains

    mspec <- sdmTMB(temp ~ 0 + source_f + year_f + s(yday, bs = "cc"),
      data = dd,
      family = student(df = 6),
      spatial = "off",
      spatiotemporal = "off",
      knots = list(yday = c(0.5, 364.5)),
      control = sdmTMBcontrol(newton_loops = 1)
    )
  } else {
    mspec <- sdmTMB(temp ~ 0 + source_f + year_f + s(yday, bs = "cc"),
      data = dd,
      family = student(df = 10),
      spatial = "off",
      spatiotemporal = "off",
      knots = list(yday = c(0.5, 364.5)),
      control = sdmTMBcontrol(newton_loops = 1)
    )
  }

  sanity(mspec)

  # Residuals
  mcmc_res_msep <- residuals(mspec,
    type = "mle-mcmc",
    mcmc_samples = sdmTMBextra::predict_mle_mcmc(mspec,
      mcmc_iter = 201,
      mcmc_warmup = 200
    )
  )

  dd$res <- as.vector(mcmc_res_msep)

  # Store residuals
  res_list[[i]] <- dd

  print(ggplot(dd, aes(sample = mcmc_res_msep)) +
    stat_qq(size = 0.75, shape = 21, fill = NA) +
    stat_qq_line() +
    labs(y = "Sample Quantiles", x = "Theoretical Quantiles", title = paste("Site = ", i)) +
    theme(aspect.ratio = 1))

  # Predict on new data
  nd_area <- data.frame(expand.grid(
    yday = seq(min(dd$yday), max(dd$yday), by = 1),
    area = unique(dd$area),
    source = unique(dd$source_f),
    year = unique(dd$year)
  )) |>
    mutate(
      source_f = as.factor(source),
      year_f = as.factor(year)
    ) |>
    left_join(gdat, by = "area") |>
    mutate(
      area = as.factor(area),
      growth_dat = ifelse(year >= min & year <= max, "Y", "N")
    )

  nd_area$pred <- predict(mspec, newdata = nd_area)$est

  # Save!
  spec_preds[[i]] <- nd_area
}
filter: removed 88,464 rows (96%), 3,461 rows remaining
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf

SAMPLING FOR MODEL 'tmb_generic' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000276 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 2.76 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:   1 / 201 [  0%]  (Warmup)
Chain 1: Iteration:  20 / 201 [  9%]  (Warmup)
Chain 1: Iteration:  40 / 201 [ 19%]  (Warmup)
Chain 1: Iteration:  60 / 201 [ 29%]  (Warmup)
Chain 1: Iteration:  80 / 201 [ 39%]  (Warmup)
Chain 1: Iteration: 100 / 201 [ 49%]  (Warmup)
Chain 1: Iteration: 120 / 201 [ 59%]  (Warmup)
Chain 1: Iteration: 140 / 201 [ 69%]  (Warmup)
Chain 1: Iteration: 160 / 201 [ 79%]  (Warmup)
Chain 1: Iteration: 180 / 201 [ 89%]  (Warmup)
Chain 1: Iteration: 200 / 201 [ 99%]  (Warmup)
Chain 1: Iteration: 201 / 201 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 1.921 seconds (Warm-up)
Chain 1:                0.004 seconds (Sampling)
Chain 1:                1.925 seconds (Total)
Chain 1: 
mutate: new variable 'source_f' (factor) with 3 unique values and 0% NA
        new variable 'year_f' (factor) with 83 unique values and 0% NA
left_join: added 2 columns (min, max)
           > rows only in x        0
           > rows only in y  (    11)
           > matched rows     83,664
           >                 ========
           > rows total       83,664
mutate: converted 'area' from character to factor (0 new NA)
        new variable 'growth_dat' (character) with 2 unique values and 0% NA
filter: removed 82,953 rows (90%), 8,972 rows remaining
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf


SAMPLING FOR MODEL 'tmb_generic' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000703 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 7.03 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:   1 / 201 [  0%]  (Warmup)
Chain 1: Iteration:  20 / 201 [  9%]  (Warmup)
Chain 1: Iteration:  40 / 201 [ 19%]  (Warmup)
Chain 1: Iteration:  60 / 201 [ 29%]  (Warmup)
Chain 1: Iteration:  80 / 201 [ 39%]  (Warmup)
Chain 1: Iteration: 100 / 201 [ 49%]  (Warmup)
Chain 1: Iteration: 120 / 201 [ 59%]  (Warmup)
Chain 1: Iteration: 140 / 201 [ 69%]  (Warmup)
Chain 1: Iteration: 160 / 201 [ 79%]  (Warmup)
Chain 1: Iteration: 180 / 201 [ 89%]  (Warmup)
Chain 1: Iteration: 200 / 201 [ 99%]  (Warmup)
Chain 1: Iteration: 201 / 201 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 3.357 seconds (Warm-up)
Chain 1:                0.006 seconds (Sampling)
Chain 1:                3.363 seconds (Total)
Chain 1: 
mutate: new variable 'source_f' (factor) with 3 unique values and 0% NA
        new variable 'year_f' (factor) with 42 unique values and 0% NA
left_join: added 2 columns (min, max)
           > rows only in x        0
           > rows only in y  (    11)
           > matched rows     46,116
           >                 ========
           > rows total       46,116
mutate: converted 'area' from character to factor (0 new NA)
        new variable 'growth_dat' (character) with 2 unique values and 0% NA
filter: removed 82,452 rows (90%), 9,473 rows remaining
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf


SAMPLING FOR MODEL 'tmb_generic' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000888 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 8.88 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:   1 / 201 [  0%]  (Warmup)
Chain 1: Iteration:  20 / 201 [  9%]  (Warmup)
Chain 1: Iteration:  40 / 201 [ 19%]  (Warmup)
Chain 1: Iteration:  60 / 201 [ 29%]  (Warmup)
Chain 1: Iteration:  80 / 201 [ 39%]  (Warmup)
Chain 1: Iteration: 100 / 201 [ 49%]  (Warmup)
Chain 1: Iteration: 120 / 201 [ 59%]  (Warmup)
Chain 1: Iteration: 140 / 201 [ 69%]  (Warmup)
Chain 1: Iteration: 160 / 201 [ 79%]  (Warmup)
Chain 1: Iteration: 180 / 201 [ 89%]  (Warmup)
Chain 1: Iteration: 200 / 201 [ 99%]  (Warmup)
Chain 1: Iteration: 201 / 201 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 3.994 seconds (Warm-up)
Chain 1:                0.007 seconds (Sampling)
Chain 1:                4.001 seconds (Total)
Chain 1: 
mutate: new variable 'source_f' (factor) with 3 unique values and 0% NA
        new variable 'year_f' (factor) with 83 unique values and 0% NA
left_join: added 2 columns (min, max)
           > rows only in x        0
           > rows only in y  (    11)
           > matched rows     90,885
           >                 ========
           > rows total       90,885
mutate: converted 'area' from character to factor (0 new NA)
        new variable 'growth_dat' (character) with 2 unique values and 0% NA
filter: removed 79,957 rows (87%), 11,968 rows remaining
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf


SAMPLING FOR MODEL 'tmb_generic' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.001178 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 11.78 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:   1 / 201 [  0%]  (Warmup)
Chain 1: Iteration:  20 / 201 [  9%]  (Warmup)
Chain 1: Iteration:  40 / 201 [ 19%]  (Warmup)
Chain 1: Iteration:  60 / 201 [ 29%]  (Warmup)
Chain 1: Iteration:  80 / 201 [ 39%]  (Warmup)
Chain 1: Iteration: 100 / 201 [ 49%]  (Warmup)
Chain 1: Iteration: 120 / 201 [ 59%]  (Warmup)
Chain 1: Iteration: 140 / 201 [ 69%]  (Warmup)
Chain 1: Iteration: 160 / 201 [ 79%]  (Warmup)
Chain 1: Iteration: 180 / 201 [ 89%]  (Warmup)
Chain 1: Iteration: 200 / 201 [ 99%]  (Warmup)
Chain 1: Iteration: 201 / 201 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 5.033 seconds (Warm-up)
Chain 1:                0.007 seconds (Sampling)
Chain 1:                5.04 seconds (Total)
Chain 1: 
mutate: new variable 'source_f' (factor) with 3 unique values and 0% NA
        new variable 'year_f' (factor) with 83 unique values and 0% NA
left_join: added 2 columns (min, max)
           > rows only in x        0
           > rows only in y  (    11)
           > matched rows     91,134
           >                 ========
           > rows total       91,134
mutate: converted 'area' from character to factor (0 new NA)
        new variable 'growth_dat' (character) with 2 unique values and 0% NA
filter: removed 83,763 rows (91%), 8,162 rows remaining
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf


SAMPLING FOR MODEL 'tmb_generic' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000634 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 6.34 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:   1 / 201 [  0%]  (Warmup)
Chain 1: Iteration:  20 / 201 [  9%]  (Warmup)
Chain 1: Iteration:  40 / 201 [ 19%]  (Warmup)
Chain 1: Iteration:  60 / 201 [ 29%]  (Warmup)
Chain 1: Iteration:  80 / 201 [ 39%]  (Warmup)
Chain 1: Iteration: 100 / 201 [ 49%]  (Warmup)
Chain 1: Iteration: 120 / 201 [ 59%]  (Warmup)
Chain 1: Iteration: 140 / 201 [ 69%]  (Warmup)
Chain 1: Iteration: 160 / 201 [ 79%]  (Warmup)
Chain 1: Iteration: 180 / 201 [ 89%]  (Warmup)
Chain 1: Iteration: 200 / 201 [ 99%]  (Warmup)
Chain 1: Iteration: 201 / 201 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 3.4 seconds (Warm-up)
Chain 1:                0.004 seconds (Sampling)
Chain 1:                3.404 seconds (Total)
Chain 1: 
mutate: new variable 'source_f' (factor) with 3 unique values and 0% NA
        new variable 'year_f' (factor) with 83 unique values and 0% NA
left_join: added 2 columns (min, max)
           > rows only in x        0
           > rows only in y  (    11)
           > matched rows     90,885
           >                 ========
           > rows total       90,885
mutate: converted 'area' from character to factor (0 new NA)
        new variable 'growth_dat' (character) with 2 unique values and 0% NA
filter: removed 81,791 rows (89%), 10,134 rows remaining
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf


SAMPLING FOR MODEL 'tmb_generic' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000752 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 7.52 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:   1 / 201 [  0%]  (Warmup)
Chain 1: Iteration:  20 / 201 [  9%]  (Warmup)
Chain 1: Iteration:  40 / 201 [ 19%]  (Warmup)
Chain 1: Iteration:  60 / 201 [ 29%]  (Warmup)
Chain 1: Iteration:  80 / 201 [ 39%]  (Warmup)
Chain 1: Iteration: 100 / 201 [ 49%]  (Warmup)
Chain 1: Iteration: 120 / 201 [ 59%]  (Warmup)
Chain 1: Iteration: 140 / 201 [ 69%]  (Warmup)
Chain 1: Iteration: 160 / 201 [ 79%]  (Warmup)
Chain 1: Iteration: 180 / 201 [ 89%]  (Warmup)
Chain 1: Iteration: 200 / 201 [ 99%]  (Warmup)
Chain 1: Iteration: 201 / 201 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 3.572 seconds (Warm-up)
Chain 1:                0.011 seconds (Sampling)
Chain 1:                3.583 seconds (Total)
Chain 1: 
mutate: new variable 'source_f' (factor) with 3 unique values and 0% NA
        new variable 'year_f' (factor) with 83 unique values and 0% NA
left_join: added 2 columns (min, max)
           > rows only in x        0
           > rows only in y  (    11)
           > matched rows     84,660
           >                 ========
           > rows total       84,660
mutate: converted 'area' from character to factor (0 new NA)
        new variable 'growth_dat' (character) with 2 unique values and 0% NA
filter: removed 81,040 rows (88%), 10,885 rows remaining
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf


SAMPLING FOR MODEL 'tmb_generic' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.00082 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 8.2 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:   1 / 201 [  0%]  (Warmup)
Chain 1: Iteration:  20 / 201 [  9%]  (Warmup)
Chain 1: Iteration:  40 / 201 [ 19%]  (Warmup)
Chain 1: Iteration:  60 / 201 [ 29%]  (Warmup)
Chain 1: Iteration:  80 / 201 [ 39%]  (Warmup)
Chain 1: Iteration: 100 / 201 [ 49%]  (Warmup)
Chain 1: Iteration: 120 / 201 [ 59%]  (Warmup)
Chain 1: Iteration: 140 / 201 [ 69%]  (Warmup)
Chain 1: Iteration: 160 / 201 [ 79%]  (Warmup)
Chain 1: Iteration: 180 / 201 [ 89%]  (Warmup)
Chain 1: Iteration: 200 / 201 [ 99%]  (Warmup)
Chain 1: Iteration: 201 / 201 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 4.57 seconds (Warm-up)
Chain 1:                0.007 seconds (Sampling)
Chain 1:                4.577 seconds (Total)
Chain 1: 
mutate: new variable 'source_f' (factor) with 3 unique values and 0% NA
        new variable 'year_f' (factor) with 83 unique values and 0% NA
left_join: added 2 columns (min, max)
           > rows only in x        0
           > rows only in y  (    11)
           > matched rows     91,134
           >                 ========
           > rows total       91,134
mutate: converted 'area' from character to factor (0 new NA)
        new variable 'growth_dat' (character) with 2 unique values and 0% NA
filter: removed 87,105 rows (95%), 4,820 rows remaining
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf


SAMPLING FOR MODEL 'tmb_generic' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000385 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 3.85 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:   1 / 201 [  0%]  (Warmup)
Chain 1: Iteration:  20 / 201 [  9%]  (Warmup)
Chain 1: Iteration:  40 / 201 [ 19%]  (Warmup)
Chain 1: Iteration:  60 / 201 [ 29%]  (Warmup)
Chain 1: Iteration:  80 / 201 [ 39%]  (Warmup)
Chain 1: Iteration: 100 / 201 [ 49%]  (Warmup)
Chain 1: Iteration: 120 / 201 [ 59%]  (Warmup)
Chain 1: Iteration: 140 / 201 [ 69%]  (Warmup)
Chain 1: Iteration: 160 / 201 [ 79%]  (Warmup)
Chain 1: Iteration: 180 / 201 [ 89%]  (Warmup)
Chain 1: Iteration: 200 / 201 [ 99%]  (Warmup)
Chain 1: Iteration: 201 / 201 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 2.179 seconds (Warm-up)
Chain 1:                0.003 seconds (Sampling)
Chain 1:                2.182 seconds (Total)
Chain 1: 
mutate: new variable 'source_f' (factor) with 3 unique values and 0% NA
        new variable 'year_f' (factor) with 83 unique values and 0% NA
left_join: added 2 columns (min, max)
           > rows only in x        0
           > rows only in y  (    11)
           > matched rows     91,134
           >                 ========
           > rows total       91,134
mutate: converted 'area' from character to factor (0 new NA)
        new variable 'growth_dat' (character) with 2 unique values and 0% NA
filter: removed 83,052 rows (90%), 8,873 rows remaining
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf


SAMPLING FOR MODEL 'tmb_generic' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.001085 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 10.85 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:   1 / 201 [  0%]  (Warmup)
Chain 1: Iteration:  20 / 201 [  9%]  (Warmup)
Chain 1: Iteration:  40 / 201 [ 19%]  (Warmup)
Chain 1: Iteration:  60 / 201 [ 29%]  (Warmup)
Chain 1: Iteration:  80 / 201 [ 39%]  (Warmup)
Chain 1: Iteration: 100 / 201 [ 49%]  (Warmup)
Chain 1: Iteration: 120 / 201 [ 59%]  (Warmup)
Chain 1: Iteration: 140 / 201 [ 69%]  (Warmup)
Chain 1: Iteration: 160 / 201 [ 79%]  (Warmup)
Chain 1: Iteration: 180 / 201 [ 89%]  (Warmup)
Chain 1: Iteration: 200 / 201 [ 99%]  (Warmup)
Chain 1: Iteration: 201 / 201 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 3.431 seconds (Warm-up)
Chain 1:                0.007 seconds (Sampling)
Chain 1:                3.438 seconds (Total)
Chain 1: 
mutate: new variable 'source_f' (factor) with 3 unique values and 0% NA
        new variable 'year_f' (factor) with 83 unique values and 0% NA
left_join: added 2 columns (min, max)
           > rows only in x        0
           > rows only in y  (    11)
           > matched rows     91,134
           >                 ========
           > rows total       91,134
mutate: converted 'area' from character to factor (0 new NA)
        new variable 'growth_dat' (character) with 2 unique values and 0% NA
filter: removed 76,748 rows (83%), 15,177 rows remaining
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf
Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
Inf
Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
-Inf


SAMPLING FOR MODEL 'tmb_generic' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.001153 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 11.53 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:   1 / 201 [  0%]  (Warmup)
Chain 1: Iteration:  20 / 201 [  9%]  (Warmup)
Chain 1: Iteration:  40 / 201 [ 19%]  (Warmup)
Chain 1: Iteration:  60 / 201 [ 29%]  (Warmup)
Chain 1: Iteration:  80 / 201 [ 39%]  (Warmup)
Chain 1: Iteration: 100 / 201 [ 49%]  (Warmup)
Chain 1: Iteration: 120 / 201 [ 59%]  (Warmup)
Chain 1: Iteration: 140 / 201 [ 69%]  (Warmup)
Chain 1: Iteration: 160 / 201 [ 79%]  (Warmup)
Chain 1: Iteration: 180 / 201 [ 89%]  (Warmup)
Chain 1: Iteration: 200 / 201 [ 99%]  (Warmup)
Chain 1: Iteration: 201 / 201 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 6.127 seconds (Warm-up)
Chain 1:                0.004 seconds (Sampling)
Chain 1:                6.131 seconds (Total)
Chain 1: 
mutate: new variable 'source_f' (factor) with 3 unique values and 0% NA
        new variable 'year_f' (factor) with 50 unique values and 0% NA
left_join: added 2 columns (min, max)
           > rows only in x        0
           > rows only in y  (    11)
           > matched rows     54,900
           >                 ========
           > rows total       54,900
mutate: converted 'area' from character to factor (0 new NA)
        new variable 'growth_dat' (character) with 2 unique values and 0% NA

nd_area <- dplyr::bind_rows(spec_preds)
area_res <- dplyr::bind_rows(res_list)

# Plot residuals
dfs <- data.frame(
  area = c("BS", "BT", "FB", "FM", "MU", "SI_EK", "RA", "JM", "HO", "SI_HA"),
  df = c(rep("\u03BD = 6", 7), rep("\u03BD = 10", 3))
)

area_res |>
  ggplot(aes(sample = res)) +
  stat_qq(size = 0.75, shape = 21, fill = NA) +
  facet_wrap(~area) +
  stat_qq_line() +
  labs(y = "Sample Quantiles", x = "Theoretical Quantiles") +
  geom_text(
    data = dfs, aes(label = df, hjust = -0.1, vjust = 1.25), inherit.aes = FALSE,
    x = -Inf, y = Inf, size = 2.6, color = "grey20"
  ) +
  theme(aspect.ratio = 1)

ggsave(paste0(home, "/figures/supp/qq_temp_area.pdf"), width = 17, height = 17, units = "cm", device = cairo_pdf)

# In order to have a lower temperature in before-nuclear times (without any data to inform that), we can use the nearby areas.. so FM informs BT prior to nuclear
nd_area_sub <- nd_area |>
  mutate(
    keep = "N",
    keep = ifelse(area == "FM" & year <= 1980, "Y", keep), # use FM instead of BT
    keep = ifelse(area == "SI_EK" & year <= 1972, "Y", keep)
  ) |> # use SI_EK instead of SI_HA
  filter(keep == "Y") |> # Now change the labels to BT and SI_EK...
  mutate(area = ifelse(area == "FM", "BT", "SI_HA"))
mutate: new variable 'keep' (character) with 2 unique values and 0% NA
filter: removed 734,394 rows (90%), 81,252 rows remaining
mutate: converted 'area' from factor to character (0 new NA)
# Bind rows and plot only the temperature series we will use for growth modelling
nd_area <- bind_rows(nd_area, nd_area_sub) |>
  select(-keep) |>
  mutate(growth_dat = ifelse(area == "SI_HA" & year %in% c(1966, 1967), "Y", growth_dat)) # SI_EK and SI_HA do not have the same starting years, so we can't use allo columns from SI_EK
select: dropped one variable (keep)
mutate: changed 2,196 values (<1%) of 'growth_dat' (0 new NA)
nd_area |>
  filter(area %in% c("FM", "BT", "SI_EK", "SI_HA")) |>
  filter(year <= 1980 & year >= 1966) |>
  group_by(area, year) |>
  summarise(mean_temp = mean(pred)) |>
  ungroup() |>
  pivot_wider(names_from = area, values_from = mean_temp)
filter: removed 532,362 rows (59%), 364,536 rows remaining
filter: removed 298,656 rows (82%), 65,880 rows remaining
group_by: 2 grouping variables (area, year)
summarise: now 60 rows and 3 columns, one group variable remaining (area)
ungroup: no grouping variables
pivot_wider: reorganized (area, mean_temp) into (BT, FM, SI_EK, SI_HA) [was 60x3, now 15x5]
# A tibble: 15 × 5
    year    BT    FM SI_EK SI_HA
   <dbl> <dbl> <dbl> <dbl> <dbl>
 1  1966  8.15  8.15  7.97  7.97
 2  1967  8.98  8.98  8.76  8.76
 3  1968  8.69  8.69  8.37  8.37
 4  1969  8.55  8.55  8.53  8.53
 5  1970  8.30  8.30  8.04  8.04
 6  1971  8.33  8.33  8.33  8.33
 7  1972  8.83  8.83  8.90  8.90
 8  1973  9.19  9.19  8.99 11.1 
 9  1974  9.05  9.05  8.66  8.46
10  1975  7.43  7.43  9.34 14.6 
11  1976  7.09  7.09  8.00 13.7 
12  1977  5.61  5.61  7.80 13.0 
13  1978  5.59  5.59  7.57 13.1 
14  1979  5.86  5.86  7.42 11.4 
15  1980  6.96  6.96  7.73 13.4 

Plot detailed exploration of predictions

# Loop trough all areas, plot temperature as a function of yday, color by data source, facet by year

nd_area |>
  mutate(area = ifelse(area %in% c("SI_HA", "BT"), paste0(area, "*"), area)) |> 
  ggplot(aes(yday, y = year, fill = pred, color = pred)) +
  geom_raster() +
  facet_wrap(~area, ncol = 4) +
  scale_fill_viridis(option = "magma") +
  scale_color_viridis(option = "magma") +
  labs(x = "Day-of-the-year", y = "Year", color = "Predicted SST (°C)", fill = "Predicted SST (°C)") +
  theme(legend.position = c(0.78, 0.14))
mutate: changed 182,268 values (20%) of 'area' (0 new NA)
Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.

ggsave(paste0(home, "/figures/supp/temp_pred_yday_area.pdf"), width = 17, height = 17, units = "cm")
ggsave(paste0(home, "/figures/for-talks/temp_pred_yday_area.pdf"), width = 14, height = 14, units = "cm")

for (i in unique(nd_area$area)) {
  plotdat <- nd_area |> filter(area == i)

  print(
    ggplot(plotdat, aes(yday, pred, color = source)) +
      scale_color_brewer(palette = "Dark2") +
      facet_wrap(~year) +
      geom_point(
        data = filter(d, area == i & year > min(plotdat$year)), size = 0.2,
        aes(yday, temp, color = source)
      ) +
      geom_line(linewidth = 0.3) +
      labs(title = paste("Site = ", i), color = "", linetype = "") +
      guides(color = guide_legend(title.position = "top", title.hjust = 0.5)) +
      theme_sleek(base_size = 8) +
      theme(
        legend.position = "bottom",
        legend.direction = "horizontal",
        legend.spacing.y = unit(-0.3, "cm")
      ) +
      labs(x = "Day of the year", y = "Predicted SST (°C)")
  )

  ggsave(paste0(home, "/figures/supp/temp_pred_yday_area_", i, ".pdf"), width = 17, height = 17, units = "cm")
}
filter: removed 813,234 rows (91%), 83,664 rows remaining
filter: removed 88,476 rows (96%), 3,449 rows remaining
filter: removed 805,764 rows (90%), 91,134 rows remaining
filter: removed 82,953 rows (90%), 8,972 rows remaining

filter: removed 806,013 rows (90%), 90,885 rows remaining
filter: removed 82,464 rows (90%), 9,461 rows remaining

filter: removed 805,764 rows (90%), 91,134 rows remaining
filter: removed 79,969 rows (87%), 11,956 rows remaining

filter: removed 806,013 rows (90%), 90,885 rows remaining
filter: removed 83,775 rows (91%), 8,150 rows remaining

filter: removed 812,238 rows (91%), 84,660 rows remaining
filter: removed 81,803 rows (89%), 10,122 rows remaining

filter: removed 805,764 rows (90%), 91,134 rows remaining
filter: removed 81,052 rows (88%), 10,873 rows remaining

filter: removed 805,764 rows (90%), 91,134 rows remaining
filter: removed 87,117 rows (95%), 4,808 rows remaining

filter: removed 805,764 rows (90%), 91,134 rows remaining
filter: removed 83,064 rows (90%), 8,861 rows remaining

filter: removed 805,764 rows (90%), 91,134 rows remaining
filter: removed 76,748 rows (83%), 15,177 rows remaining

# Same but trimmed
for (i in unique(nd_area$area)) {
  plotdat <- nd_area |>
    filter(area == i) |>
    filter(growth_dat == "Y")

  print(
    ggplot(plotdat, aes(yday, pred, color = source)) +
      scale_color_brewer(palette = "Dark2") +
      facet_wrap(~year) +
      geom_point(
        data = filter(d, area == i & year > min(plotdat$year)), size = 0.2,
        aes(yday, temp, color = source)
      ) +
      geom_line(linewidth = 0.3) +
      labs(title = paste("Site = ", i), color = "", linetype = "") +
      guides(color = guide_legend(title.position = "top", title.hjust = 0.5)) +
      theme_sleek(base_size = 8) +
      theme( # legend.position = c(0.8, 0.08),
        legend.position = "bottom",
        legend.direction = "horizontal",
        legend.spacing.y = unit(-0.3, "cm")
      ) +
      labs(x = "Day of the year", y = "Predicted SST (°C)")
  )

  ggsave(paste0(home, "/figures/supp/temp_pred_yday_area_trimmed_", i, ".pdf"), width = 17, height = 17, units = "cm")
}
filter: removed 813,234 rows (91%), 83,664 rows remaining
filter: removed 65,520 rows (78%), 18,144 rows remaining
filter: removed 89,016 rows (97%), 2,909 rows remaining
filter: removed 805,764 rows (90%), 91,134 rows remaining
filter: removed 49,410 rows (54%), 41,724 rows remaining
filter: removed 82,953 rows (90%), 8,972 rows remaining

filter: removed 806,013 rows (90%), 90,885 rows remaining
filter: removed 38,325 rows (42%), 52,560 rows remaining
filter: removed 82,812 rows (90%), 9,113 rows remaining

filter: removed 805,764 rows (90%), 91,134 rows remaining
filter: removed 49,410 rows (54%), 41,724 rows remaining
filter: removed 80,245 rows (87%), 11,680 rows remaining

filter: removed 806,013 rows (90%), 90,885 rows remaining
filter: removed 52,560 rows (58%), 38,325 rows remaining
filter: removed 84,279 rows (92%), 7,646 rows remaining

filter: removed 812,238 rows (91%), 84,660 rows remaining
filter: removed 19,380 rows (23%), 65,280 rows remaining
filter: removed 81,959 rows (89%), 9,966 rows remaining

filter: removed 805,764 rows (90%), 91,134 rows remaining
filter: removed 69,174 rows (76%), 21,960 rows remaining
filter: removed 81,544 rows (89%), 10,381 rows remaining

filter: removed 805,764 rows (90%), 91,134 rows remaining
filter: removed 66,978 rows (73%), 24,156 rows remaining
filter: removed 87,657 rows (95%), 4,268 rows remaining

filter: removed 805,764 rows (90%), 91,134 rows remaining
filter: removed 38,430 rows (42%), 52,704 rows remaining
filter: removed 83,400 rows (91%), 8,525 rows remaining

filter: removed 805,764 rows (90%), 91,134 rows remaining
filter: removed 37,332 rows (41%), 53,802 rows remaining
filter: removed 76,748 rows (83%), 15,177 rows remaining

Plot monthly temperatures by area to see how consistent differences are over season

nd_area |>
  filter(growth_dat == "Y") |>
  summarise(n = length(unique(area)), .by = c(year)) |>
  filter(n == 10) |>
  arrange(desc(year))
filter: removed 486,519 rows (54%), 410,379 rows remaining
summarise: now 64 rows and 2 columns, ungrouped
filter: removed 48 rows (75%), 16 rows remaining
   year  n
1  2000 10
2  1999 10
3  1998 10
4  1997 10
5  1996 10
6  1995 10
7  1994 10
8  1993 10
9  1992 10
10 1991 10
11 1990 10
12 1989 10
13 1988 10
14 1987 10
15 1986 10
16 1985 10
# Lets's do 1985:2010
seasonal <- nd_area |>
  filter(source == "logger") |>
  filter(year %in% c(1985:2000)) |>
  summarise(temp = mean(pred), .by = c(area, yday))
filter: removed 597,932 rows (67%), 298,966 rows remaining
filter: removed 241,334 rows (81%), 57,632 rows remaining
summarise: now 3,602 rows and 3 columns, ungrouped
# Warm areas are consistently warmer
p1 <- ggplot(seasonal, aes(yday, temp, color = factor(area, levels = order$area))) + 
  geom_line() + 
  scale_color_manual(values = alpha(colors, alpha = 1), name = "Area") +  
  labs(x = "Day-of-the-year", 
       y = "Predicted SST (°C)") +
  theme(legend.position = "bottom") +
  guides(color = guide_legend(nrow = 1, title.position = "top", title.hjust = 0.5)) +
  NULL

# Summer temperatures
p2 <- nd_area |>
  mutate(area2 = ifelse(area %in% c("SI_HA", "BT"), paste0(area, "*"), area)) |> 
  filter(source == "logger") |>
  filter(year %in% c(1985:2000)) |>
  mutate(date = as.Date(yday - 1, origin = "2000-01-01"),
         month = month(date)) |> 
  filter(month %in% c(6:8)) |>
  summarise(temp = mean(pred), .by = c(area2, year)) |>
  ggplot(aes(factor(area2, order$area2), temp, color = factor(area2, levels = order$area2))) + 
  geom_jitter(height = 0, width = 0.1) +
  geom_boxplot(width = 0.2, fill = NA) + 
  guides(color = "none") +
  scale_color_manual(values = alpha(colors, alpha = 1), name = "Area") +
  labs(y = "Predicted SST (°C)", 
       x = "Area") +
  NULL
mutate: new variable 'area2' (character) with 10 unique values and 0% NA
filter: removed 597,932 rows (67%), 298,966 rows remaining
filter: removed 241,334 rows (81%), 57,632 rows remaining
mutate: new variable 'date' (Date) with 366 unique values and 0% NA
        new variable 'month' (double) with 12 unique values and 0% NA
filter: removed 42,912 rows (74%), 14,720 rows remaining
summarise: now 160 rows and 3 columns, ungrouped
# Correlation between mean and summer temperatures
annual_mean <- nd_area |>
  filter(source == "logger") |>
  filter(year %in% c(1985:2000)) |>
  mutate(date = as.Date(yday - 1, origin = "2000-01-01"),
         month = month(date)) |> 
  summarise(annual_mean = mean(pred), .by = c(year, area))
filter: removed 597,932 rows (67%), 298,966 rows remaining
filter: removed 241,334 rows (81%), 57,632 rows remaining
mutate: new variable 'date' (Date) with 366 unique values and 0% NA
        new variable 'month' (double) with 12 unique values and 0% NA
summarise: now 160 rows and 3 columns, ungrouped
summer_mean <- nd_area |>
  filter(source == "logger") |>
  filter(year %in% c(1985:2000)) |>
  mutate(date = as.Date(yday - 1, origin = "2000-01-01"),
         month = month(date)) |> 
  filter(month %in% c(6:8)) |>
  summarise(summer_mean = mean(pred), .by = c(year, area))
filter: removed 597,932 rows (67%), 298,966 rows remaining
filter: removed 241,334 rows (81%), 57,632 rows remaining
mutate: new variable 'date' (Date) with 366 unique values and 0% NA
        new variable 'month' (double) with 12 unique values and 0% NA
filter: removed 42,912 rows (74%), 14,720 rows remaining
summarise: now 160 rows and 3 columns, ungrouped
corr <- annual_mean |> left_join(summer_mean, by = c("year", "area"))
left_join: added one column (summer_mean)
           > rows only in x     0
           > rows only in y  (  0)
           > matched rows     160
           >                 =====
           > rows total       160
p3 <- ggplot(corr, aes(summer_mean, annual_mean, color = area)) + 
  geom_point() +
  guides(color = "none") +
  scale_color_manual(values = alpha(colors, alpha = 1), name = "Area") +
  labs(x = "Mean summer temperature (June - Aug) (°C)", 
       y = "Annual mean temperature (°C)")

p1 / (p2 | p3) + plot_annotation(tag_levels = "A")

ggsave(paste0(home, "/figures/supp/temperature_tests.pdf"), width = 19, height = 18, units = "cm")

Plot summarized data and predictions

# Summarise data
dsum <- d |>
  group_by(year, area, source) |>
  summarise(temp = mean(temp)) |>
  mutate(type = "data")
group_by: 3 grouping variables (year, area, source)
summarise: now 1,376 rows and 4 columns, 2 group variables remaining (year, area)
mutate (grouped): new variable 'type' (character) with one unique value and 0% NA
preds_area <- nd_area |>
  filter(growth_dat == "Y" & source == "logger") |>
  group_by(area, year) |>
  summarise(temp = mean(pred)) |>
  mutate(model = "area model")
filter: removed 760,105 rows (85%), 136,793 rows remaining
group_by: 2 grouping variables (area, year)
summarise: now 380 rows and 3 columns, one group variable remaining (area)
mutate (grouped): new variable 'model' (character) with one unique value and 0% NA

Make final plot using the area-specific model

# Add latitude
area <- c("BS", "BT", "FB", "FM", "HO", "JM", "MU", "RA", "SI_EK", "SI_HA")
nareas <- length(area)
lat <- c(60, 60.4, 60.3, 60.5, 63.7, 58, 59, 65.9, 57.3, 57.4)
lon <- c(21.5, 18.1, 19.5, 18, 20.9, 16.8, 18.1, 22.3, 16.6, 16.7)
area_attr <- data.frame(cbind(area = area, lat = lat, lon = lon)) |>
  mutate_at(c("lat", "lon"), as.numeric) |>
  arrange(desc(lat))
mutate_at: converted 'lat' from character to double (0 new NA)
           converted 'lon' from character to double (0 new NA)
order <- read_csv(paste0(home, "/output/ranked_temps.csv")) |> 
  mutate(area2 = ifelse(area %in% c("SI_HA", "BT"), paste0(area, "*"), area))
Rows: 10 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): area
dbl (1): mean_temp

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mutate: new variable 'area2' (character) with 10 unique values and 0% NA
preds_area |> 
  mutate(area2 = ifelse(area %in% c("SI_HA", "BT"), paste0(area, "*"), area)) |> 
  ggplot(aes(year, temp, color = temp)) +
  facet_wrap(~ factor(area2, levels = order$area2), ncol = 5) +
  geom_line(linewidth = 0.6) +
  labs(x = "Year", y = "Model-predicted annual average temperature (°C)") +
  scale_color_viridis(option = "magma", name = "Site") +
  guides(color = "none")
mutate (grouped): new variable 'area2' (character) with 10 unique values and 0% NA

ggsave(paste0(home, "/figures/annual_average_temperature.pdf"), width = 17, height = 9, units = "cm")

# Check year range
preds_area |>
  group_by(area) |>
  summarise(
    min = min(year),
    max = max(year)
  ) |>
  arrange(min)
group_by: one grouping variable (area)
summarise: now 10 rows and 3 columns, ungrouped
# A tibble: 10 × 3
   area    min   max
   <chr> <dbl> <dbl>
 1 JM     1953  2016
 2 BT     1963  2000
 3 FM     1963  2000
 4 SI_HA  1966  2014
 5 SI_EK  1968  2015
 6 FB     1969  2016
 7 MU     1981  2000
 8 HO     1982  2016
 9 BS     1985  2002
10 RA     1985  2006
# Check temperature range
preds_area |>
  group_by(area) |>
  summarise(
    min = min(temp),
    max = max(temp)
  ) |>
  arrange(min)
group_by: one grouping variable (area)
summarise: now 10 rows and 3 columns, ungrouped
# A tibble: 10 × 3
   area    min   max
   <chr> <dbl> <dbl>
 1 RA     3.06  9.56
 2 JM     3.37 11.8 
 3 HO     3.99  8.07
 4 FB     5.04 10.6 
 5 MU     5.16  8.57
 6 SI_EK  5.49 10.4 
 7 BT     5.87 16.6 
 8 FM     5.87 15.0 
 9 BS     6.22 11.8 
10 SI_HA  7.76 16.9 
# Save prediction df
write_csv(preds_area, paste0(home, "/output/gam_predicted_temps.csv"))